import requests
import json
import pandas as pd
from tqdm import tqdm

userNames = []
commentDetails = []
commentTimes = []
ipZones = []

total_pages = 15

for pagen in tqdm(range(0, total_pages), desc='爬取进度', unit='页'):
    payload = {
        "arg": {
            "channelType": 2,
            "collapseTpte": 0,
            "commentTagId": 0,
            "pageSize": 50,
            "poiId": 76342,  # 需要自己更改的地方 为景点的信息 其他都可以直接复制
            "sourseType": 1,
            "sortType": 3,
            "pageIndex": pagen,
            "starType": 0
        },
        "head": {
            "cid": "09031062417234242897",
            "ctok": "",
            "cver": "1.0",
            "lang": "01",
            "sid": "888",
            "syscode": "09",
            "auth": "",
            "xsid": "",
            "extension": []
        }
    }
    postUrl = "https://m.ctrip.com/restapi/soa2/13444/json/getCommentCollapseList"

    html = requests.post(postUrl, data=json.dumps(payload)).text
    html_1 = json.loads(html)

    # 检查响应中是否存在'items'
    if 'items' in html_1["result"]:
        commentItems = html_1["result"]["items"]

        for i in range(0, len(commentItems)):
            # 在访问元素之前检查当前项是否不为None
            if commentItems[i] is not None and 'userInfo' in commentItems[i] and 'userNick' in commentItems[i][
                'userInfo']:
                userName = commentItems[i]['userInfo']['userNick']
                commentDetail = commentItems[i]['content']
                commentTime = commentItems[i]['publishTypeTag'].split(' ')[0]
                ipZone = commentItems[i]['ipLocatedName']

                userNames.append(userName)
                commentDetails.append(commentDetail)
                commentTimes.append(commentTime)
                ipZones.append(ipZone)

# 创建 DataFrame
df = pd.DataFrame({
    '用户评论内容': commentDetails,
    '用户名': userNames,
    '用户评论时间': commentTimes,
    '用户属地': ipZones
})

# 保存到 Excel 文件
df.to_excel('只爬黄龙溪评论1223url.xlsx', index=False)
